We will try to quantify DNA abundance in the nucleus (between n and 2n) for the Williams high content screen.
In [1]:
%matplotlib inline
%cd ~/Data/williams
from matplotlib import pyplot as plt
from skimage import io
import os
In [2]:
ims = io.imread_collection('*.tif')
fns = filter(lambda x: x.endswith('.tif'),
sorted(os.listdir('.')))
In [3]:
fns[:6]
Out[3]:
Let's figure out which channel is which:
In [5]:
from matplotlib import cm
from husc import preprocess as pre
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, im in zip(axes, ims):
ax.imshow(pre.stretchlim(im, 0.01, 0.99), cmap=cm.cubehelix)
Looks like channel 2 is the nucleus but still a bit ambiguous...! Let's try the next lot of images:
In [6]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, im in zip(axes, ims[3:6]):
ax.imshow(pre.stretchlim(im, 0.01, 0.99), cmap=cm.cubehelix)
Ok let's try the candidate nucleus image from the next lot:
In [7]:
fig = plt.figure(figsize=(10, 10))
plt.imshow(pre.stretchlim(ims[7], 0.01, 0.99), cmap=cm.cubehelix)
Out[7]:
Let's try to get a peak for each nucleus. Eyeballing it, a nucleus has radius about 10 or 15. Let's try 10 for a gaussian blur.
In [8]:
nuclei = ims[1::3]
from scipy import ndimage as nd
radius = 10
gnuclei = [nd.gaussian_filter(im, radius) for im in nuclei]
from skimage import feature
detect = [feature.peak_local_max(g, min_distance=radius, indices=False)
for g in gnuclei]
In [9]:
from skimage.morphology import binary_dilation, disk
detectw = [binary_dilation(d, disk(3)) for d in detect]
In [10]:
figure, axes = plt.subplots(3, 2, figsize=(15, 30))
for ax, im, d in zip(axes.ravel(), nuclei, detectw):
ax.imshow(im, cmap=cm.gray)
ax.imshow(d, cmap=cm.jet, interpolation='nearest', alpha=0.5)
That's pretty good detection. Now, let's try to find one segment per nucleus by thresholding the image and applying watershed.
In [29]:
import numpy as np
from scipy import ndimage as nd
from skimage.morphology import watershed
from skimage.filter import threshold_otsu
nucleus_masks = [(im > threshold_otsu(im)) for
im in nuclei]
nuclei_objs = [watershed(-im, nd.label(seeds)[0], mask=mask) for
im, seeds, mask in
zip(nuclei, detectw, nucleus_masks)]
from skimage.segmentation import find_boundaries
nuclei_edges = map(find_boundaries, nuclei_objs)
In [18]:
nuclei[0].max()
Out[18]:
In [30]:
viz = []
for im, edge in zip(nuclei, nuclei_edges):
r = g = (255 * edge).astype(np.uint8)
b = (255 * im.astype(np.float) / im.max()).astype(np.uint8)
viz.append(np.dstack((r, g, b)))
In [31]:
figure, axes = plt.subplots(3, 2, figsize=(15, 30))
for ax, im in zip(axes.ravel(), viz):
ax.imshow(im, interpolation='nearest')
In [32]:
import vis
figure, axes = plt.subplots(3, 2, figsize=(15, 30))
for ax, im in zip(axes.ravel(), nuclei_objs):
vis.sshow(im, ax=ax)
In [44]:
from skimage.measure import regionprops
propss = [regionprops(im, intensity_image=iim)
for im, iim in zip(nuclei_objs, nuclei)]
def total_intensity(props):
return props.mean_intensity * props.area
nuclei_intensities = [map(total_intensity, pr)
for pr in propss]
In [45]:
total_intensity(propss[0][0])
Out[45]:
In [46]:
figure, axes = plt.subplots(3, 2, figsize=(15, 30))
for ax, ints in zip(axes.ravel(), nuclei_intensities):
ax.hist(ints)
In [48]:
import itertools as it
all_values = list(it.chain(*nuclei_intensities))
plt.hist(all_values)
Out[48]: